# Python 3.5
##############################################################################################################
#  Heatmaps and Hexabins
##############################################################################################################
import pandas as pd
import seaborn as sns
from pylab import *
import matplotlib.pyplot as plt
import os
import numpy as np

pd.set_option('display.expand_frame_repr', False)
pd.set_option('display.max_rows', 150)
pd.set_option('display.max_columns', 250)

path_data = '/home/emd/Dropbox/Night Lights Validation (Eugenie, Ryan, Johannes)/Data/Python_code_merging/'
path_figure = '/home/emd/Dropbox/Night Lights Validation (Eugenie, Ryan, Johannes)/Manuscript/Figures/'

myfile = pd.read_stata(os.path.join(path_data, 'Data_Merged_11_20_stata.dta'))
myfile["electrified_hh_nbr_log"] = np.log(myfile["electrified_hh_nbr"]+1)
myfile["SH_sum_11_log"] = np.log(myfile["SH_sum_11"]+1)
myfile["_2K_sum_11_log"] = np.log(myfile["_2K_sum_11"]+1)
myfile["_3K_sum_11_log"] = np.log(myfile["_3K_sum_11"]+1)
myfile["_5K_sum_11_log"] = np.log(myfile["_5K_sum_11"]+1)
myfile.SH_mean_11.describe()

import numpy as np

##############################################################################################
# BOXPLOTS
##############################################################################################
colnames = ['electrified_hh_nbr_log', 'SH_sum_11_log']
subdata = myfile[colnames]
nonnull_values = subdata['SH_sum_11_log'][subdata['SH_sum_11_log'] != 0].dropna()
q20, q40, q60, q80 = np.percentile(nonnull_values, [20, 40, 60, 80])

subdata['x_var'] = '-'
subdata.loc[subdata[subdata['SH_sum_11_log'] == 0].index, 'x_var'] = '0'
subdata.loc[subdata[(subdata['SH_sum_11_log'] > 0) & (subdata['SH_sum_11_log'] < q20)].index, 'x_var'] = '0 - 20'
subdata.loc[subdata[(subdata['SH_sum_11_log'] >= q20) & (subdata['SH_sum_11_log'] < q40)].index, 'x_var'] = '20 - 40'
subdata.loc[subdata[(subdata['SH_sum_11_log'] >= q40) & (subdata['SH_sum_11_log'] < q60)].index, 'x_var'] = '40 - 60'
subdata.loc[subdata[(subdata['SH_sum_11_log'] >= q60) & (subdata['SH_sum_11_log'] < q80)].index, 'x_var'] = '60 - 80'
subdata.loc[subdata[subdata['SH_sum_11_log'] >= q80].index, 'x_var'] = '> 80'
subdata['x_var'].value_counts()
subdata = subdata[subdata['x_var'] != '-']

subdata = subdata.rename(columns={'electrified_hh_nbr_log': 'Log of number of electrified households',
                                  'x_var': 'Log of sum of nightlights within village boundaries (percentiles)'})

sns.set_palette("Greys")

ax = sns.boxplot(x='Log of sum of nightlights within village boundaries (percentiles)',
                 y="Log of number of electrified households",
                 data=subdata,
                 order=['0', '0 - 20', '20 - 40', '40 - 60', '60 - 80', '> 80'])

savefig(os.path.join(path_figure, 'boxplots_percentiles_new.pdf'), bbox_inches='tight')
close()



#################################################################################################################
# MAIN HEATMAP FOR MAIN MANUSCRIPT
##################################################################################################################
# HEAT MAP - measures of electrification with different measures of NL
list(myfile.columns.values)
colnames = ['electrified_hh_nbr', 'electrified_hh_nbr_log', 'electrified_hh_per',
            'API_max_2011',
            'API_mean_2011',
            'SH_mean_11',
            'SH_sum_11',
            'SH_sum_11_log',
            '_5K_sum_11_log',
            '_3K_sum_11_log',
            '_2K_sum_11_log',
            'BIptval_11',
            'PTval_11'
            ]
subdata = myfile[colnames]
subdata = subdata.rename(columns={'electrified_hh_per': 'Elec %', 'electrified_hh_nbr': 'Elec #',
                         'electrified_hh_nbr_log': 'Log elec #', 'SH_sum_11': 'Shape sum',
                                  'SH_sum_11_log': 'Log shape sum', 'PTval_11': 'Pt', 'BIptval_11': 'Bi',
                                  '_2K_sum_11_log': 'Log 2k sum', '_3K_sum_11_log': 'Log 3k sum', 'SH_mean_11': 'Shape mean',
                                  '_5K_sum_11_log': 'Log 5k sum', 'API_mean_2011': 'Api mean', 'API_max_2011': 'Api max'})

subdata = subdata.rename(columns={'electrified_hh_per': 'elec %', 'electrified_hh_nbr': 'elec nbr',
                         'electrified_hh_nbr_log': 'elec nbr log', 'SH_sum_11': 'ShSum',
                                  'SH_sum_11_log': 'log ShSum', 'PTval_11': 'Pt', 'BIptval_11': 'Bi',
                                  '_2K_sum_11_log': 'log 2kSum', '_3K_sum_11_log': 'log 3kSum', 'SH_mean_11': 'ShMean',
                                  '_5K_sum_11_log': 'log 5kSum', 'API_mean_2011': 'ApiMean', 'API_max_2011': 'ApiMax'})


sns.set()
sns.set(style="white")
# d = subdata
d = subdata.dropna()
corr = d.corr()
# Generate a mask for the upper triangle
mask = np.zeros_like(corr, dtype=np.bool)
mask[np.triu_indices_from(mask)] = True
# Set up the matplotlib figure
f, ax = plt.subplots(figsize=(10, 10))
ax.set_title('Measures for year 2011')
# Generate a custom diverging colormap
# cmap = sns.diverging_palette(220, 10, as_cmap=True)
# Draw the heatmap with the mask and correct aspect ratio
sns.set_palette("Greys")

sns.heatmap(corr, mask=mask, vmin=0, vmax=1, cmap="Greys",
            square=True, annot=True, annot_kws={"size": 10},
            linewidths=2, cbar_kws={"shrink": .3}, ax=ax)
yticks, xticks = corr.index, corr.index
plt.yticks(rotation=0)
plt.xticks(rotation=90)

savefig(os.path.join(path_figure, 'heatmap_NL_new.pdf'), bbox_inches='tight')
savefig(os.path.join(path_figure, 'heatmap_NL_new.png'), bbox_inches='tight')
plt.close()

###############################################################################
# OTHER HEATMAPS FOR APPENDIX
###############################################################################
# 1)
# HEAT MAP - MEAN
list(myfile.columns.values)
colnames = ['electrified_hh_per', 'electrified_hh_nbr_log', 'electrified_hh_nbr', 'SH_mean_11', 'API_mean_2011',
            '_2K_mean_11', '_3K_mean_11', '_5K_mean_11', 'PTval_11', 'BIptval_11']
subdata = myfile[colnames]
subdata = subdata.rename(columns={'electrified_hh_per': 'elec %', 'electrified_hh_nbr': 'elec nbr',
                         'electrified_hh_nbr_log': 'elec nbr log',
                                  'SH_mean_11': 'Sh', 'PTval_11': 'Pt', 'BIptval_11': 'Bi',
                                  '_2K_mean_11': '2k', '_3K_mean_11': '3k',
                                  '_5K_mean_11': '5k', 'API_mean_2011': 'Api' })


sns.set()
sns.set(style="white")
# d = subdata
d = subdata.dropna()
corr = d.corr()
# Generate a mask for the upper triangle
mask = np.zeros_like(corr, dtype=np.bool)
mask[np.triu_indices_from(mask)] = True
# Set up the matplotlib figure
f, ax = plt.subplots(figsize=(15, 13))
ax.set_title('Correlations with MEANS of various NL data - year 2011')
# Generate a custom diverging colormap
cmap = sns.diverging_palette(220, 10, as_cmap=True)
# Draw the heatmap with the mask and correct aspect ratio
sns.heatmap(corr, mask=mask, cmap=cmap,  vmin=0, vmax=1,
            square=True, annot=True,
            linewidths=5, cbar_kws={"shrink": .3}, ax=ax)
savefig(os.path.join(path_figure, 'heatmap_mean.png'), bbox_inches='tight')
plt.close()


###############################################################################
# 2)
# HEAT MAP - MIN
list(myfile.columns.values)
colnames = ['electrified_hh_per', 'electrified_hh_nbr_log', 'electrified_hh_nbr', 'SH_min_11', 'API_min_2011',
            '_2K_min_11', '_3K_min_11', '_5K_min_11']
subdata = myfile[colnames]
subdata = subdata.rename(columns={'electrified_hh_per': 'elec %', 'electrified_hh_nbr': 'elec nbr',
                         'electrified_hh_nbr_log': 'elec nbr log',
                                  'SH_min_11': 'Sh',
                                  '_2K_min_11': '2k', '_3K_min_11': '3k',
                                  '_5K_min_11': '5k', 'API_min_2011': 'Api'})
sns.set()
sns.set(style="white")
# d = subdata
d = subdata.dropna()
corr = d.corr()
# Generate a mask for the upper triangle
mask = np.zeros_like(corr, dtype=np.bool)
mask[np.triu_indices_from(mask)] = True
# Set up the matplotlib figure
f, ax = plt.subplots(figsize=(15, 13))
ax.set_title('Correlations with MIN of various NL data - year 2011')
# Generate a custom diverging colormap
cmap = sns.diverging_palette(220, 10, as_cmap=True)
# Draw the heatmap with the mask and correct aspect ratio
sns.heatmap(corr, mask=mask, cmap=cmap,  vmin=0, vmax=1,
            square=True, annot=True,
            linewidths=.5, cbar_kws={"shrink": .4}, ax=ax)
savefig(os.path.join(path_figure, 'heatmap_min.png'), bbox_inches='tight')
plt.close()

###############################################################################
# 3)
# HEAT MAP - MAX
list(myfile.columns.values)
colnames = ['electrified_hh_per', 'electrified_hh_nbr_log', 'electrified_hh_nbr', 'SH_max_11', 'API_max_2011',
            '_2K_max_11', '_3K_max_11', '_5K_max_11']
subdata = myfile[colnames]
subdata = subdata.rename(columns={'electrified_hh_per': 'elec %', 'electrified_hh_nbr': 'elec nbr',
                         'electrified_hh_nbr_log': 'elec nbr log',
                                  'SH_max_11': 'Sh',
                                  '_2K_max_11': '2k', '_3K_max_11': '3k',
                                  '_5K_max_11': '5k', 'API_max_2011': 'Api'})
sns.set()
sns.set(style="white")
# d = subdata
d = subdata.dropna()
corr = d.corr()
# Generate a mask for the upper triangle
mask = np.zeros_like(corr, dtype=np.bool)
mask[np.triu_indices_from(mask)] = True
# Set up the matplotlib figure
f, ax = plt.subplots(figsize=(15, 13))
ax.set_title('Correlations with MAX of various NL data - year 2011')
# Generate a custom diverging colormap
cmap = sns.diverging_palette(220, 10, as_cmap=True)
# Draw the heatmap with the mask and correct aspect ratio
sns.heatmap(corr, mask=mask, cmap=cmap,  vmin=0, vmax=1,
            square=True, annot=True,
            linewidths=.5, cbar_kws={"shrink": .5}, ax=ax)
savefig(os.path.join(path_figure, 'heatmap_max.png'), bbox_inches='tight')
plt.close()

###############################################################################
# 4)
# HEAT MAP - SUM
list(myfile.columns.values)
colnames = ['electrified_hh_per', 'electrified_hh_nbr_log', 'electrified_hh_nbr', 'SH_sum_11_log',
            '_2K_sum_11_log', '_3K_sum_11_log', '_5K_sum_11_log', 'SH_sum_11',
            '_2K_sum_11', '_3K_sum_11', '_5K_sum_11']
subdata = myfile[colnames]
subdata = subdata.rename(columns={'electrified_hh_per': 'elec %', 'electrified_hh_nbr': 'elec nbr',
                         'electrified_hh_nbr_log': 'elec nbr log',
                                  'SH_sum_11_log': 'logSh', '_2K_sum_11_log': 'log2k', '_3K_sum_11_log': 'log3k',
                                  '_5K_sum_11_log': 'log5k', 'SH_sum_11': 'Sh',
                                  '_2K_sum_11': '2k', '_3K_sum_11': '3k',
                                  '_5K_sum_11': '5k'})
sns.set()
sns.set(style="white")
# d = subdata
d = subdata.dropna()
corr = d.corr()
# Generate a mask for the upper triangle
mask = np.zeros_like(corr, dtype=np.bool)
mask[np.triu_indices_from(mask)] = True
# Set up the matplotlib figure
f, ax = plt.subplots(figsize=(15, 13))
ax.set_title('Correlations with log(SUM) for sh, 2k, 3k and 5k - year 2011')
# Generate a custom diverging colormap
cmap = sns.diverging_palette(220, 10, as_cmap=True)
# Draw the heatmap with the mask and correct aspect ratio
sns.heatmap(corr, mask=mask, cmap=cmap,  vmin=0, vmax=1,
            square=True, annot=True,
            linewidths=.5, cbar_kws={"shrink": .5}, ax=ax)
savefig(os.path.join(path_figure, 'heatmap_sum.png'), bbox_inches='tight')
plt.close()


###############################################################################
# 5) SHAPE
# HEAT MAP - measures of electrification with different measures of NL (min, max, mean etc...)
list(myfile.columns.values)
colnames = ['electrified_hh_per', 'electrified_hh_nbr_log', 'electrified_hh_nbr', 'SH_sum_11', 'SH_sum_11_log',
            'SH_mean_11', 'SH_median_11', 'SH_min_11', 'SH_max_11', 'SH_std_11', 'SH_variety_11',
            'SH_majority_11']
subdata = myfile[colnames]

subdata = subdata.rename(columns={'electrified_hh_per': 'elec %', 'electrified_hh_nbr': 'elec nbr',
                         'electrified_hh_nbr_log': 'elec nbr log',
                                  'SH_mean_11': 'Mean', 'SH_median_11': 'Median',
                                  'SH_min_11': 'Min', 'SH_max_11': 'Max', 'SH_std_11': 'SD',
                                  'SH_sum_11': 'Sum', 'SH_sum_11_log': 'logSum', 'SH_variety_11': 'Var',
                                  'SH_majority_11': 'Mode'})
sns.set()
sns.set(style="white")
# d = subdata
d = subdata.dropna()
corr = d.corr()
# Generate a mask for the upper triangle
mask = np.zeros_like(corr, dtype=np.bool)
mask[np.triu_indices_from(mask)] = True
# Set up the matplotlib figure
f, ax = plt.subplots(figsize=(15, 13))
ax.set_title('Measures for year 2011 for shape file data')
# Generate a custom diverging colormap
cmap = sns.diverging_palette(220, 10, as_cmap=True)
# Draw the heatmap with the mask and correct aspect ratio
sns.heatmap(corr, mask=mask, cmap=cmap, vmin=0, vmax=1,
            square=True, annot=True,
            linewidths=.5, cbar_kws={"shrink": .5}, ax=ax)
savefig(os.path.join(path_figure, 'heatmap_shape.png'), bbox_inches='tight')
plt.close()

###############################################################################
# 6) 2K - HEAT MAP
list(myfile.columns.values)
colnames = ['electrified_hh_per', 'electrified_hh_nbr_log', 'electrified_hh_nbr', '_2K_sum_11', '_2K_sum_11_log',
            '_2K_mean_11', '_2K_median_11', '_2K_min_11', '_2K_max_11',
            '_2K_std_11', '_2K_variety_11', '_2K_majority_11']
subdata = myfile[colnames]

subdata = subdata.rename(columns={'electrified_hh_per': 'elec %', 'electrified_hh_nbr': 'elec nbr',
                         'electrified_hh_nbr_log': 'elec nbr log',
                                  '_2K_mean_11': 'Mean', '_2K_median_11': 'Median',
                                  '_2K_min_11': 'Min', '_2K_max_11': 'Max', '_2K_std_11': 'SD',
                                  '_2K_sum_11': 'Sum', '_2K_sum_11_log': 'logSum', '_2K_variety_11': 'Var',
                                  '_2K_majority_11': 'Mode'})
sns.set()
sns.set(style="white")
# d = subdata
d = subdata.dropna()
corr = d.corr()
# Generate a mask for the upper triangle
mask = np.zeros_like(corr, dtype=np.bool)
mask[np.triu_indices_from(mask)] = True
# Set up the matplotlib figure
f, ax = plt.subplots(figsize=(15, 13))
ax.set_title('Measures for year 2011 for 2K file data')
# Generate a custom diverging colormap
cmap = sns.diverging_palette(220, 10, as_cmap=True)
# Draw the heatmap with the mask and correct aspect ratio
sns.heatmap(corr, mask=mask, cmap=cmap, vmin=0, vmax=1,
            square=True, annot=True,
            linewidths=.5, cbar_kws={"shrink": .5}, ax=ax)
savefig(os.path.join(path_figure, 'heatmap_2K.png'), bbox_inches='tight')
plt.close()


###############################################################################
# 7) 3K - HEAT MAP
list(myfile.columns.values)
colnames = ['electrified_hh_per', 'electrified_hh_nbr_log', 'electrified_hh_nbr', '_3K_sum_11', '_3K_sum_11_log',
            '_3K_mean_11', '_3K_median_11', '_3K_min_11', '_3K_max_11',
            '_3K_std_11', '_3K_variety_11', '_3K_majority_11']
subdata = myfile[colnames]
subdata = subdata.rename(columns={'electrified_hh_per': 'elec %', 'electrified_hh_nbr': 'elec nbr',
                         'electrified_hh_nbr_log': 'elec nbr log', '_3K_sum_11_log': 'logSum',
                                  '_3K_mean_11': 'Mean', '_3K_median_11': 'Median',
                                  '_3K_min_11': 'Min', '_3K_max_11': 'Max', '_3K_std_11': 'SD',
                                  '_3K_sum_11': 'Sum', '_3K_variety_11': 'Var', '_3K_majority_11': 'Mode'})
sns.set()
sns.set(style="white")
# d = subdata
d = subdata.dropna()
corr = d.corr()
# Generate a mask for the upper triangle
mask = np.zeros_like(corr, dtype=np.bool)
mask[np.triu_indices_from(mask)] = True
# Set up the matplotlib figure
f, ax = plt.subplots(figsize=(15, 13))
ax.set_title('Measures for year 2011 for 3K file data')
# Generate a custom diverging colormap
cmap = sns.diverging_palette(220, 10, as_cmap=True)
# Draw the heatmap with the mask and correct aspect ratio
sns.heatmap(corr, mask=mask, cmap=cmap, vmin=0, vmax=1,
            square=True, annot=True,
            linewidths=.5, cbar_kws={"shrink": .5}, ax=ax)
savefig(os.path.join(path_figure, 'heatmap_3K.png'), bbox_inches='tight')
plt.close()



###############################################################################
# 8) 5K - HEAT MAP
list(myfile.columns.values)
colnames = ['electrified_hh_per', 'electrified_hh_nbr_log', 'electrified_hh_nbr', '_5K_sum_11_log', '_5K_sum_11',
            '_5K_mean_11', '_5K_median_11', '_5K_min_11', '_5K_max_11',
            '_5K_std_11', '_5K_variety_11', '_5K_majority_11']
subdata = myfile[colnames]

subdata = subdata.rename(columns={'electrified_hh_per': 'elec %', 'electrified_hh_nbr': 'elec nbr',
                         'electrified_hh_nbr_log': 'elec nbr log', '_5K_sum_11': 'Sum',
                                  '_5K_mean_11': 'Mean', '_5K_median_11': 'Median',
                                  '_5K_min_11': 'Min', '_5K_max_11': 'Max', '_5K_std_11': 'SD',
                                  '_5K_sum_11_log': 'logSum', '_5K_variety_11': 'Var', '_5K_majority_11': 'Mode'})
sns.set()
sns.set(style="white")
# d = subdata
d = subdata.dropna()
corr = d.corr()
# Generate a mask for the upper triangle
mask = np.zeros_like(corr, dtype=np.bool)
mask[np.triu_indices_from(mask)] = True
# Set up the matplotlib figure
f, ax = plt.subplots(figsize=(15, 13))
ax.set_title('Measures for year 2011 for 5K file data')
# Generate a custom diverging colormap
cmap = sns.diverging_palette(220, 10, as_cmap=True)
# Draw the heatmap with the mask and correct aspect ratio
sns.heatmap(corr, mask=mask, cmap=cmap, vmin=0, vmax=1,
            square=True, annot=True,
            linewidths=.5, cbar_kws={"shrink": .5}, ax=ax)
savefig(os.path.join(path_figure, 'heatmap_5K.png'), bbox_inches='tight')
plt.close()


##############################################################################################
# HEXABIN PLOTS
##############################################################################################
import numpy as np
myfile["SH_sum_11"].describe()
myfile["electrified_hh_nbr"].describe()

# FOR VARIABLES: SH_sum_11_log and electrified_hh_nbr_log
# Hexbin plot with marginal distributions
myfile1 = myfile.rename(columns={
    'SH_sum_11_log': 'Log of sum of nightlights within village boundaries',
    'electrified_hh_nbr_log': 'Log number of electrified households'})
sns.set_style("ticks")
gr = sns.jointplot(x='Log of sum of nightlights within village boundaries', y='Log number of electrified households', data=myfile1, kind='hex', gridsize=15, stat_func=None, color='black')
gr = gr.plot_joint(sns.regplot, scatter=False, lowess=True, line_kws={"color": 'grey', "lw": 1})

gr.savefig(os.path.join(path_figure, 'SH_logsum_electrified_hh_nbrlog_11_hexabin_new.png'), bbox_inches='tight')
gr.savefig(os.path.join(path_figure, 'SH_logsum_electrified_hh_nbrlog_11_hexabin_new.pdf'), bbox_inches='tight')
close()


# FOR VARIABLES: SH_sum_11_log and electrified_hh_per (percentage)
# Hexbin plot with marginal distributions
myfile1 = myfile.rename(columns={'SH_sum_11_log': 'Night lights (shape, log sum, 2011)',
                                 'electrified_hh_per': 'Households with electricity (%)'})
gr = sns.jointplot(x='Night lights (shape, log sum, 2011)', y='Households with electricity (%)', data=myfile1, kind='hex', gridsize=8)
gr = gr.plot_joint(sns.regplot, scatter=False, lowess=True, line_kws={"color": 'grey', "lw": 1})
gr.savefig(os.path.join(path_figure, 'SH_logsum_electrified_hh_per_11_hexabin.png'), bbox_inches='tight')
close()



##############################################################################################
# HEXABIN PLOTS PER STATE
##############################################################################################
from scipy.stats.stats import pearsonr

state_dict = {1: 'Jammu & Kashmir', 2: 'Himachal Pradesh', 3: 'Punjab', 4: 'Chandigarh', 5: 'Uttranchal', 6: 'Haryana',
              7: 'Delhi', 8: 'Rajasthan', 9: 'Uttar Pradesh', 10: 'Bihar', 11: 'Sikkim', 12: 'Arunachal Pradesh',
              13: 'Nagaland', 14: 'Manipur', 15: 'Mizoram', 16: 'Tripura', 17: 'Meghalaya', 18: 'Assam',
              19: 'West Bengal', 20: 'Jharkhand', 21: 'Orissa', 22: 'Chhattisgarh', 23: 'Madhya Pradesh',
              24: 'Gujarat', 25: 'Daman & Diu', 26: 'Dadra & Nagar Haveli', 27: 'Maharashtra', 28: 'Andhra Pradesh',
              29: 'Karnataka', 30: 'Goa', 31: 'Lakshdweep', 32: 'Kerala', 33: 'Tamil Nadu', 34: 'Pondicherry',
              35: 'Andaman & Nicobar Islands'}

colname = ['St11', 'SH_sum_11_log', 'electrified_hh_nbr_log']
myfile1 = myfile[colname]
myfile1 = myfile1.dropna()
myfile1.loc[:, 'St11'] = myfile1.loc[:, 'St11'].astype(np.int)
myfile1 = myfile1.rename(columns={'SH_sum_11_log': 'Night lights (shape, log sum, 2011)', 'electrified_hh_nbr_log': 'Number of electrified households (log)'})
myfile1['St11'].value_counts()

data_corr = pd.DataFrame(None, columns={'Pearson', 'N_obs'})
x = 'Night lights (shape, log sum, 2011)'
y = 'Number of electrified households (log)'

for state_code in range(1, 35):
    print(state_code)
    myfile_st = myfile1[myfile1['St11'] == state_code]
    data_corr.loc[state_code, 'Pearson'] = pearsonr(myfile_st[x], myfile_st[y])[0]
    data_corr.loc[state_code, 'N_obs'] = myfile_st.shape[0]
    if myfile_st.shape[0] > 90:
        try:
            gr = sns.jointplot(x='Night lights (shape, log sum, 2011)', y='Number of electrified households (log)',
                               data=myfile_st, kind='hex', gridsize=15, xlim=(0, 8), ylim=(0, 9))
            gr = gr.plot_joint(sns.regplot, scatter=False, lowess=True, line_kws={"color": 'grey', "lw": 1})
            plt.subplots_adjust(top=0.92)
            gr.fig.suptitle(state_dict[state_code], fontsize=16, fontweight='bold')
            gr.savefig(os.path.join(path_figure,
                                    'SH_logsum_electrified_hh_nbrlog_11_hexabinSTATE_' + str(state_code) + '.png'),
                       bbox_inches='tight')
            close()
        except Exception as exception:
            print(exception.args[0])
            pass

import os
data_corr = data_corr.reset_index()
data_corr = data_corr.rename(columns={'index': 'State_code'})
path_data = '/home/emd/Dropbox/Night Lights Validation (Eugenie, Ryan, Johannes)/Data/Python_code_merging/'
data_corr.to_csv(os.path.join(path_data, 'data_corr_perstate.csv'), sep=',', header=True, index=True, na_rep='')



##############################################################################################
# PLOTS for distance
##############################################################################################
path_data = '/home/emd/Dropbox/Night Lights Validation (Eugenie, Ryan, Johannes)/Data/Python_code_merging/Data_Merged_11_20_stata.dta'

data = pd.read_stata(path_data)
data['Distance'].describe()
data['Distance'][data['Distance']<50].hist(bins=10)



data1 = data.rename(columns={'SH_sum_11_log': 'Night lights (shape, log sum, 2011)', 'electrified_hh_nbr_log': 'Number of electrified households (log)'})
data1 = data1[(data1['Distance'] >= 40) & (data1['Distance'] < 60)]
gr = sns.jointplot(x='Night lights (shape, log sum, 2011)', y='Number of electrified households (log)', data=data1, kind='hex', gridsize=15)
gr = gr.plot_joint(sns.regplot, scatter=False, lowess=True, line_kws={"color": 'grey', "lw": 1})
gr.savefig(os.path.join(path_figure, 'SH_logsum_electrified_hh_nbrlog_11_hexabin.png'), bbox_inches='tight')
close()


data1 = data.rename(columns={'SH_sum_11_log': 'Night lights (shape, log sum, 2011)', 'electrified_hh_nbr_log': 'Number of electrified households (log)'})
data1 = data1[(data1['Distance'] >= 80) & (data1['Distance'] < 100)]
gr = sns.jointplot(x='Night lights (shape, log sum, 2011)', y='Number of electrified households (log)', data=data1, kind='hex', gridsize=15)
gr = gr.plot_joint(sns.regplot, scatter=False, lowess=True, line_kws={"color": 'grey', "lw": 1})
gr.savefig(os.path.join(path_figure, 'SH_logsum_electrified_hh_nbrlog_11_hexabin.png'), bbox_inches='tight')
close()






